# -*- coding=utf-8 -*-
import urllib.request
import re

def getHtml(url):
    page = urllib.request.urlopen(url)
    html = page.read()
    return html.decode('utf-8').replace(u'\xa0',u' ')

def println(html):
    reg = r'<b>(.*?)</b>'
    recomp = re.compile(reg)
    result = re.findall(recomp,html)
    for i in result:
        file = open(r'text.txt','a')
        file.write(i+'\n\r')
        file.close()

print('请稍等正在爬取中...')
y = 1
for x in range(1,95):
    url = 'http://www.hbei.com.cn/news/news/newslist_%s.html'%x
    print('正在爬取第%s页数据'%y)
    println(getHtml(url))
    y += 1
print('爬取完成')